diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 2a45a8f8..c7f6f8cd 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6581,7 +6581,9 @@ class Dataset(
         attrs = self._attrs if keep_attrs else None
         return self._replace_with_new_dims(variables, attrs=attrs)
 
-    def _binary_op(self, other, f, reflexive=False, join=None) -> Dataset:
+    def _binary_op(self, other, f, reflexive=False, join=None, keep_attrs=None) -> Dataset:
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
         from xarray.core.dataarray import DataArray
         from xarray.core.groupby import GroupBy
 
@@ -6591,7 +6593,9 @@ class Dataset(
         if isinstance(other, (DataArray, Dataset)):
             self, other = align(self, other, join=align_type, copy=False)  # type: ignore[assignment]
         g = f if not reflexive else lambda x, y: f(y, x)
-        ds = self._calculate_binary_op(g, other, join=align_type)
+        ds = self._calculate_binary_op(g, other, join=align_type, keep_attrs=keep_attrs)
+        if keep_attrs:
+            ds._copy_attrs_from(self)
         return ds
 
     def _inplace_binary_op(self: T_Dataset, other, f) -> T_Dataset:
@@ -6619,7 +6623,7 @@ class Dataset(
         return self
 
     def _calculate_binary_op(
-        self, f, other, join="inner", inplace: bool = False
+        self, f, other, join="inner", inplace: bool = False, keep_attrs: bool = False
     ) -> Dataset:
         def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars):
             if inplace and set(lhs_data_vars) != set(rhs_data_vars):
@@ -6646,7 +6650,7 @@ class Dataset(
             new_data_vars = apply_over_both(
                 self.data_vars, other, self.data_vars, other
             )
-            return type(self)(new_data_vars)
+            return type(self)(new_data_vars, attrs=self._attrs if keep_attrs else None)
 
         other_coords: Coordinates | None = getattr(other, "coords", None)
         ds = self.coords.merge(other_coords)
@@ -6660,6 +6664,8 @@ class Dataset(
             new_vars = {k: f(self.variables[k], other_variable) for k in self.data_vars}
         ds._variables.update(new_vars)
         ds._dims = calculate_dimensions(ds._variables)
+        if keep_attrs:
+            ds._attrs = self._attrs
         return ds
 
     def _copy_attrs_from(self, other):
